#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time    : 2017/7/31 17:52
@Author  : tonychao
@File    : DBReader.py
功能：从数据库读取历史运行记录
理论上应该是唯一的数据来源
"""


import MySQLdb
import numpy as np
"""
db= MySQLdb.connect(
        host='192.168.190.134',
        port = 3306,
        user='tonychao',
        passwd='sql',
        db ='sparkTuningDB',
        )


db= MySQLdb.connect(
        host='127.0.0.1',
        port = 3306,
        user='tonychao',
        passwd='sql',
        db ='sparkTuningDB',
        )
"""
"""
db= MySQLdb.connect(
        host='192.168.1.50',
        port = 3306,
        user='tonychao',
        passwd='sql',
        db ='sparkTuningDB',
        )
"""
db= MySQLdb.connect(
        host='10.13.30.18',
        port = 32100,
        user='tonychao',
        passwd='sql',
        db ='sparkTuningDB',
        )
"""
"""

'''
此函数用于读取建立机器学习模型所需的数据
输入： app_md5 该提交文件的MD5值
输出：retList=[inputConf,runTimeList,totalRunTime,len(runTimeList)]
inputConf；numpy 2d
runTimeList;numpy 2d
totalRunTime;numpy 1d
'''

def readDB (app_md5,app_class):
    db.query("SELECT *  FROM run_record WHERE app_md5= \'"+app_md5+"\'and app_class=\'"+app_class+"\' and total_run_time_ms !='-1';")
    r=db.store_result()
    run_recordTuple= r.fetch_row(how=1,maxrows=100000)     #it is zero which means, return as a tuple. how=1 means, return it as a dictionary

    runTimeList=[];totalRunTime=[]
    inputConf1=[];inputConf2=[];inputConf3=[];
    runTime1=[]; runTime2=[]; runTime3=[];

    for run_record in run_recordTuple:

        # 第一组 运行参数
        str= run_record['group1_conf']
        tmpConfstr=str.split('\t')[:-1]
        tmpConf=[]
        for str in tmpConfstr:
            tmpConf.append(eval(str))
        inputConf1.append(tmpConf)

        # 第二组 运行参数
        str= run_record['group2_conf']
        tmpConfstr=str.split('\t')[:-1]
        tmpConf=[]
        for str in tmpConfstr:
            tmpConf.append(eval(str))
        inputConf2.append(tmpConf)

        # 第三组 运行参数
        str= run_record['group3_conf']
        tmpConfstr=str.split('\t')[:-1]
        tmpConf=[]
        for str in tmpConfstr:
            tmpConf.append(eval(str))
        inputConf3.append(tmpConf)

        #totalRunTime 总运行时间
        str= run_record['total_run_time_ms']
        totalRunTime.append(long(str))

        # 每个Group的运行时间
        runTime1.append(long( run_record['group1_time_ms']))
        runTime2.append(long( run_record['group2_time_ms']))
        runTime3.append(long( run_record['group3_time_ms']))

    return  [inputConf1,inputConf2,inputConf3,runTime1,runTime2,runTime3,totalRunTime]



#readDB ('250a21b717a695b767f40a831f4d4345')

